#=====Lecture 10 r code: linear models
metals <- read.table("http://lib.stat.cmu.edu/DASL/Datafiles/Pottery.html", header=T)
head(metals, n=3)
table(metals$Site)  #how many samples from each site
tapply(X=metals$Fe, INDEX=metals$Site, FUN=mean)

#now run the ANOVA model 
plot(Fe ~ Site, data=metals)  #try the default plot
Fe.aov <- aov(Fe~Site, data=metals)
summary(Fe.aov)

#alternative method of running ANOVA
Fe.lm <- lm(Fe~Site, data=metals)
anova(Fe.lm)
summary(Fe.lm) #more output from lm()

#Tukeys honest significant difference test
Fe.aov <- aov(Fe~Site, data=metals)
TukeyHSD(Fe.aov)
plot(TukeyHSD(Fe.aov))

#Tests
table(metals$Site)

#=======In-class exercise 1
metals <- read.table("metals.txt", header=T)
Fe.lm <- lm(Fe~Site, data=metals)
names(Fe.lm)  #find out how to get the residuals
Fe.lm$residuals

#use a qq-plot to see if it is normal
qqnorm(Fe.lm$residuals)
qqline(Fe.lm$residuals) 

library(MASS) 
#Kolmogorov-Smirnov test
sigma <- summary(Fe.lm)$sigma
?ks.test

#here the ... means parameters of cumulative distribution function
#i.e. pnorm(mean=0, sd=sigma)
ks.test(x=Fe.lm$residuals, y=pnorm, mean=0, sd=sigma)

#shapiro test
shapiro.test(Fe.lm$residuals) 

#equal group variances?
boxplot(metals$Fe~metals$Site)
boxplot(Fe~Site, data=metals)

#Bartlett's test
bartlett.test(metals$Fe, metals$Site)  

#Fligner test
fligner.test(metals$Fe, metals$Site) 

#Conclusion: 
#cannot reject assumption of normality, 
#or assumption of equal variance

#=========ANCOVA/linear regression
marmot <- read.table("Data//marmot.txt", header=T)
head(marmot)

#interaction plots
interaction.plot(x.factor=marmot$loc, trace.factor=marmot$type, 
                 response=marmot$len, trace.label = "Type",
                 xlab = "Location", lwd=2,  
                 ylab = "Mean of Marmot Whistle Length")

#plot with different colors for different threat types
plot(marmot$dist, marmot$len, xlab = "Distance from challenge", 
     ylab = "Length of whistles", type = "n")  #"n" means "do not plot"!
points(marmot$dist[marmot$type == "Dog"], 
       marmot$len[marmot$type == "Dog"],pch = 17, col = "blue")
points(marmot$dist[marmot$type == "Human"],
       marmot$len[marmot$type == "Human"],pch = 18, col = "red")
points(marmot$dist[marmot$type == "RCPlane"], 
       marmot$len[marmot$type == "RCPlane"], pch = 19, col = "green")
legend("bottomleft", bty = 'n', levels(marmot$type), 
       col = c("blue", "red", "green"), pch = 17:19)

#model with interactions
interactionModel <- lm(len ~ loc + type*dist, data=marmot)
interactionModel
summary(interactionModel)

#components of the summary output stored in a list
names(interactionModel)


#what the formula of the call was
interactionModel$call

#ANOVA partial f-test
interactionModel <- lm(len ~ loc + type*dist, data=marmot)
nonInteractionModel <- lm(len ~ loc + type + dist, data = marmot)
anova(nonInteractionModel,interactionModel)

#=============Hands-on exercise 2
AIC(nonInteractionModel, interactionModel)
#or one at a time
AIC(nonInteractionModel)
AIC(interactionModel)

logLik(nonInteractionModel)
logLik(interactionModel)

n <- nrow(marmot)  #number of data points

#number of fitted coefficients PLUS variance (+1)
#that is the number of parameters in the model
#also logLik returns "df" which is number of pars
p1 <- length(nonInteractionModel$coefficients) + 1 
p2 <- length(interactionModel$coefficients) + 1 
p1
p2
AIC.nonIM <- -2*logLik(nonInteractionModel)+2*p1
AIC.IM <- -2*logLik(interactionModel)+2*p2
AIC.nonIM
AIC.IM  #lower AIC = better model
   
#AIC corrected
AICc.nonIM <- -2*logLik(nonInteractionModel)+
   2*p1*(n/(n-p1-1))
AICc.IM <- -2*logLik(interactionModel)+
   2*p2*(n/(n-p2-1))
AICc.nonIM
AICc.IM  #lower AIC = better model

#still favor the model with the interaction term
#since it has lower AICc


#=========checking model assumptions
plot(interactionModel)
plot(interactionModel, which=2)

#confidence intervals for parameters
round(confint(interactionModel),6)

# example making predictions

( newobs <- data.frame(rep = c(3, 1), dist = c(10, 13), type = rep("Dog", 2), loc = rep("A", 2)) )

predict(interactionModel, newobs, interval = "confidence")
predict(interactionModel, newobs, interval = "prediction")